#!/usr/bin/perl
use strict;
use warnings;
use utf8;
use open qw/:std :utf8/;
use Getopt::Long;
my ($programme) = $0 =~ m|^(?:.*/)?(.+)|;
my $input = "";
my $output = "metadata.txt";
eval {
$SIG{__WARN__} = sub {usage(1);};
GetOptions(
"input=s" => \$input,
"output=s" => \$output,
);
};
$SIG{__WARN__} = sub {warn $_[0];};
usage(2) if not $input;
open(WOS, "<:utf8", $input) or die "Couldn't open file \"$input\", $!";
open(META, ">:utf8", $output) or die "Couldn't open file \"$output\", $!";
print META "TI;SO;LA;DT;AB;PU;J9;PY;SC;UT\n";
my @valeurs = ();
foreach my $ligne (<WOS>)
{
chomp($ligne);
$ligne =~ s/\r//go;
if ($ligne =~ /^TI (.*)/o)
{
$valeurs[0] = "$1";
# équivalent à
# $valeurs[0] = substr($ligne, 3);
}
elsif ($ligne =~ /^SO (.*)/o)
{
$valeurs[1] = "$1";
}
elsif ($ligne =~ /^LA (.*)/o)
{
$valeurs[2] = "$1";
}
elsif ($ligne =~ /^DT (.*)/o)
{
$valeurs[3] = "$1";
}
elsif ($ligne =~ /^AB (.*)/o)
{
$valeurs[4] = "$1";
}
elsif ($ligne =~ /^PU (.*)/o)
{
$valeurs[5] = "$1";
}
elsif ($ligne =~ /^J9 (.*)/o)
{
$valeurs[6] = "$1";
}
elsif ($ligne =~ /^PY (.*)/o)
{
$valeurs[7] = "$1";
}
elsif ($ligne =~ /^SC (.*)/o)
{
$valeurs[8] = "$1";
}
elsif ($ligne =~ /UT ISTEX:(.*)/o)
{
my $lien = $1;
$valeurs[9] = "<a href=\"https://api.istex.fr/document/".$lien."/fulltext/pdf?sid=scodex\" target=\"_blank\">".$lien."</a>";
}
elsif ($ligne =~ /^ER/o)
{
foreach my $valeur (@valeurs)
{
if ($valeur =~ /[,;"]/o)#pour proteger les valeurs au format CSV
{
$valeur =~ s/"/""/go;
$valeur = '"' . $valeur . '"';
}
}
print META join(";", @valeurs), "\n";
@valeurs = ();
}
}
close WOS;
close META;
exit 0;
sub usage
{
my $code = shift;
print "Usage : $programme -i input [ -o output ]\n";
exit $code;
}